setwd("/Users/Kadeem Gilbert/Desktop/Microbiome project/R_analysis")

library(vegan)
library(beanplot)
library(FSA)
library(dunn.test)

#'diversity' function for Shannon index, e.g. diversity(eotu.t)
#ediv<-diversity(eotu.t)
#bdiv<-diversity(botu.t)
#write.table(ediv,"C:/Users/Kadeem Gilbert/Desktop/ediv.txt",sep="\t")
#write.table(bdiv,"C:/Users/Kadeem Gilbert/Desktop/bdiv.txt",sep="\t")


alpha <- read.table("Malpha.txt",sep="\t",head=T,row.names=1,na.strings="?") #read in alpha diversity data, HP or M

mdat <- read.table("MicrobiomeMetadata.txt", sep="\t", head=T, row.names=1) #get metadata
mdat <- mdat[order(row.names(mdat)),] # order row names
mdat.alpha <- subset(mdat, row.names(mdat) %in% row.names(alpha)) # subset, because metadata had more samples
rownames(mdat.alpha) == rownames(alpha) # sanity check

# drop any categories that are no longer in the subsetted metadata file
mdat.alpha$Species <- mdat.alpha$Species[drop=T]
mdat.alpha$Canopy <- mdat.alpha$Canopy[drop=T]
mdat.alpha$Width <- mdat.alpha$Width[drop=T]
mdat.alpha$Length <- mdat.alpha$Length[drop=T]
mdat.alpha$Fluid_Volume <- mdat.alpha$Fluid_Volume[drop=T]
mdat.alpha$pH <- mdat.alpha$pH[drop=T]
mdat.alpha$Elevation <- mdat.alpha$Elevation[drop=T]
mdat.alpha$Plant <- mdat.alpha$Plant[drop=T]
mdat.alpha$Pitcher <- mdat.alpha$Pitcher[drop=T]
mdat.alpha$Fluid_Color <- mdat.alpha$Fluid_Color[drop=T]
mdat.alpha$Viscosity <- mdat.alpha$Viscosity[drop=T]
mdat.alpha$Prey <- mdat.alpha$Prey[drop=T]

#Add "effective number of species" columns
alpha
alpha$efctv.b <- exp(alpha$shannon.b)
alpha$efctv.e <- exp(alpha$shannon.e)
alpha$efctv.e.noMet <- exp(alpha$shannon.e.noMet)

#assess normality
library(nortest)
pearson.test(mdat.alpha$pH) #non-normal
pearson.test(mdat.alpha$Fluid_Volume) #non-normal
pearson.test(alpha$shannon.b) #non-normal (p=0.008896)
pearson.test(alpha$shannon.e) #normal (p=0.299)

#do everything in one glm 
summary(m1a <- glm(alpha$shannon.b ~ Elevation+pH+Percent_Open+Fluid_Volume+Length+Morph+Pitcher_Color, data = mdat.alpha))#only pH ***
summary(m1a <- glm(alpha$shannon.e ~ Elevation+pH+Percent_Open+Fluid_Volume+Length+Morph+Pitcher_Color, data = mdat.alpha))#only morph **
mdat.alpha$Morph<-factor(mdat.alpha$Morph, levels=c("Lower", "Upper"))
plot(alpha$shannon.e~mdat.alpha$Morph,col="grey25",xlab="Pitcher Morph",ylab="Shannon Index",main="Eukaryotic Alpha Diversity",cex.lab=2.5, cex.axis=2.5, cex.main=2.5)#plot it
summary(m1a <- glm(alpha$shannon.e.noMet ~ Elevation+pH+Percent_Open+Fluid_Volume+Length+Morph+Pitcher_Color, data = mdat.alpha))#without Metazoa, only pH *

#investigate correlation between bacterial and eukaryotic alpha diversity
plot(shannon.b~shannon.e,data=alpha,ylab="Eukaryotic Shannon Index",xlab="Bacterial Shannon Index",main="Alpha Diversity")
summary(lm(alpha$shannon.e~alpha$shannon.b)) #nonsig, p=0.09
summary(lm(alpha$shannon.e.noMet~alpha$shannon.b))
plot(shannon.b~shannon.e.noMet,data=alpha,ylab="Eukaryotic Shannon Index",xlab="Bacterial Shannon Index",main="Alpha Diversity without Metazoa") #without Metazoa, eukaryotic alpha diversity increases with bacterial alpha diversity, r2=0.2477,p=0.0057**

########################################################
#Abundance for counts
setwd("/Users/Kadeem Gilbert/Desktop/Microbiome project/")
counts<-read.table("CountData.txt",sep="\t",header=T,row.names=1,na.strings="?")

#richness by Elevation, observe alpha=0.05/3
plot(counts$Ant_sp~counts$Elevation,xlab="Elevation (m asl)",ylab="Counts",main="Ant Morphospecies Richness")
summary(glm(counts$Ant_sp~counts$Elevation,family=poisson))
#p=0.22, absolutely no visible trend (z=1.234)
#culicids?
plot(counts$Culicid_sp~counts$Elevation,xlab="Elevation (m asl)",ylab="Counts",main="Culicid Morphospecies Richness")
summary(glm(counts$Culicid_sp~counts$Elevation,family=poisson))
#p=0.21, absolutely no visible trend (z=-1.253)
#insect orders?
plot(counts$Prey_orders~counts$Elevation,xlab="Elevation (m asl)",ylab="Number of Orders",main="Non-Ant Prey Richness")
summary(glm(counts$Prey_orders~counts$Elevation,family=poisson))
#p=0.9268

#glmm
require(glmmADMB)
require(lme4)

summary(m1a <- glm(Culicids ~ Elevation+pH+Percent_Open+Fluid_Volume+Length+Morph+Color, data = counts, family = poisson))#multiple regression, do for each taxon

###Compare sequence and count data###################################

cdat <- counts
cdat <- cdat[order(row.names(cdat)),] # order row names
cdat.otus <- subset(cdat, row.names(cdat) %in% row.names(otus)) # subset, because metadata had more samples
rownames(cdat.otus) == rownames(otus) # sanity check

plot(log(otus$Culicidae+1)~log(cdat.otus$Culicids+1),ylab="Counts",xlab="Sequences",main="Culicidae")
plot(log(otus$Ceratopogonidae+1)~log(cdat.otus$Ceratopogonids+1),ylab="Counts",xlab="Sequences",main="Ceratopogonidae")
plot(log(otus$Acari+1)~log(cdat.otus$Mites+1),ylab="Counts",xlab="Sequences",main="Acari")
plot(log(otus$Insecta_noNematocera+1)~log(cdat.otus$OtherInsects+1),ylab="Counts",xlab="Sequences",main="Insects other than ants and nematocerans")

#only nonzero counts
plot(log(otus$Culicidae[cdat.otus$Culicids>0]+1)~log(cdat.otus$Culicids[cdat.otus$Culicids>0]+1)) #doesnt't change significance
boxplot(log(otus$Culicidae+1)~otus$pH_group,col="skyblue1",main="Culicidae",xlab="pH",ylab="log abundance")
boxplot(log(counts$Culicids+1)~otus$pH_group,col="skyblue1",main="Culicids",xlab="pH",ylab="log abundance")
#they both recover relevant patterns however; kruskal results similar too, chisquare~7 p~0.02




